head(crime420co4)
## category persistent_id date lat long street_id
## 1 anti-social-behaviour 2023-01 51.88306 0.909136 2153366
## 2 anti-social-behaviour 2023-01 51.90124 0.901681 2153173
## 3 anti-social-behaviour 2023-01 51.88907 0.897722 2153077
## 4 anti-social-behaviour 2023-01 51.89122 0.901988 2153186
## 5 anti-social-behaviour 2023-01 51.89416 0.895433 2153012
## 6 anti-social-behaviour 2023-01 51.88050 0.909014 2153379
## street_name context id location_type
## 1 On or near Military Road NA 107596596 Force
## 2 On or near NA 107596646 Force
## 3 On or near Culver Street West NA 107595950 Force
## 4 On or near Ryegate Road NA 107595953 Force
## 5 On or near Market Close NA 107595979 Force
## 6 On or near Lisle Road NA 107595985 Force
## location_subtype outcome_status
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
head(temp180co4)
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1 3590 2023-12-31 8.7 10.6 4.4 7.2
## 2 3590 2023-12-30 6.6 9.7 4.4 4.2
## 3 3590 2023-12-29 9.9 11.4 6.9 6.0
## 4 3590 2023-12-28 9.9 11.5 4.0 7.5
## 5 3590 2023-12-27 5.8 10.6 3.9 3.7
## 6 3590 2023-12-26 9.8 12.7 6.3 7.6
## HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1 89.6 S 25.0 63.0 999.0 6.2 8.0 8.0
## 2 85.5 WSW 22.7 50.0 1006.9 0.4 4.6 6.5
## 3 77.2 SW 32.8 61.2 1003.6 0.8 6.5 6.7
## 4 84.6 SSW 32.2 70.4 1003.2 2.8 6.8 7.1
## 5 86.4 SW 13.2 37.1 1016.4 2.0 4.0 6.9
## 6 86.9 WSW 23.5 46.3 1006.2 4.4 6.5 7.4
## SunD1h VisKm PreselevHp SnowDepcm
## 1 0.0 26.3 NA NA
## 2 1.1 48.3 NA NA
## 3 0.1 26.7 NA NA
## 4 0.0 25.1 NA NA
## 5 3.2 30.1 NA NA
## 6 0.0 45.8 NA NA
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(wordcloud2)
library(DT)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
# View the structure of the data frames
str(crime420co4)
## 'data.frame': 6878 obs. of 12 variables:
## $ category : chr "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" ...
## $ persistent_id : chr "" "" "" "" ...
## $ date : chr "2023-01" "2023-01" "2023-01" "2023-01" ...
## $ lat : num 51.9 51.9 51.9 51.9 51.9 ...
## $ long : num 0.909 0.902 0.898 0.902 0.895 ...
## $ street_id : int 2153366 2153173 2153077 2153186 2153012 2153379 2153105 2153541 2152937 2153107 ...
## $ street_name : chr "On or near Military Road" "On or near " "On or near Culver Street West" "On or near Ryegate Road" ...
## $ context : logi NA NA NA NA NA NA ...
## $ id : int 107596596 107596646 107595950 107595953 107595979 107595985 107596603 107596291 107596305 107596453 ...
## $ location_type : chr "Force" "Force" "Force" "Force" ...
## $ location_subtype: chr "" "" "" "" ...
## $ outcome_status : chr NA NA NA NA ...
str(temp180co4)
## 'data.frame': 365 obs. of 18 variables:
## $ station_ID : int 3590 3590 3590 3590 3590 3590 3590 3590 3590 3590 ...
## $ Date : chr "2023-12-31" "2023-12-30" "2023-12-29" "2023-12-28" ...
## $ TemperatureCAvg: num 8.7 6.6 9.9 9.9 5.8 9.8 12.5 10 9.6 10 ...
## $ TemperatureCMax: num 10.6 9.7 11.4 11.5 10.6 12.7 14.3 12 10.8 12.6 ...
## $ TemperatureCMin: num 4.4 4.4 6.9 4 3.9 6.3 9.5 8.4 8.1 8.1 ...
## $ TdAvgC : num 7.2 4.2 6 7.5 3.7 7.6 10.1 7 6.5 6.2 ...
## $ HrAvg : num 89.6 85.5 77.2 84.6 86.4 86.9 85.3 81.5 81.2 78.2 ...
## $ WindkmhDir : chr "S" "WSW" "SW" "SSW" ...
## $ WindkmhInt : num 25 22.7 32.8 32.2 13.2 23.5 34.1 32.7 34.1 37.5 ...
## $ WindkmhGust : num 63 50 61.2 70.4 37.1 46.3 72.3 61.2 68.6 77.8 ...
## $ PresslevHp : num 999 1007 1004 1003 1016 ...
## $ Precmm : num 6.2 0.4 0.8 2.8 2 4.4 0.8 0.8 0 2 ...
## $ TotClOct : num 8 4.6 6.5 6.8 4 6.5 7.8 5 8 7.5 ...
## $ lowClOct : num 8 6.5 6.7 7.1 6.9 7.4 7.8 6.7 8 7.5 ...
## $ SunD1h : num 0 1.1 0.1 0 3.2 0 0 2.9 0 1.4 ...
## $ VisKm : num 26.3 48.3 26.7 25.1 30.1 45.8 61.8 72.9 69.4 34.3 ...
## $ PreselevHp : logi NA NA NA NA NA NA ...
## $ SnowDepcm : int NA NA NA NA NA NA NA NA NA NA ...
# Check for missing values
sum(is.na(crime420co4))
## [1] 7555
sum(is.na(temp180co4))
## [1] 851
# Load the stringr packag
# Create a new variable for the cleaned dataset
cleaned_crimeco4 <- crime420co4
# List of numeric columns
numeric_columns <- sapply(cleaned_crimeco4, is.numeric)
# Replace NA values in numeric columns with mean
cleaned_crimeco4[numeric_columns] <- lapply(cleaned_crimeco4[numeric_columns], function(x) {
ifelse(is.na(x), round(mean(x, na.rm = TRUE), 1), x)
})
# Data cleaning for cleaned_crimeco4
# Fill missing values in outcome_status
cleaned_crimeco4$outcome_status[is.na(cleaned_crimeco4$outcome_status)] <- "No Information"
# Clean street names in crime data
cleaned_crimeco4$street_name <- str_trim(str_to_lower(cleaned_crimeco4$street_name))
# Parse the date column in the cleaned_crimeco4 dataset
cleaned_crimeco4$date <- ym(cleaned_crimeco4$date)
# Remove irrelevant columns (context, location_subtype)
cleaned_crimeco4 <- subset(cleaned_crimeco4, select = -c(context, location_subtype))
head(cleaned_crimeco4)
## category persistent_id date lat long street_id
## 1 anti-social-behaviour 2023-01-01 51.88306 0.909136 2153366
## 2 anti-social-behaviour 2023-01-01 51.90124 0.901681 2153173
## 3 anti-social-behaviour 2023-01-01 51.88907 0.897722 2153077
## 4 anti-social-behaviour 2023-01-01 51.89122 0.901988 2153186
## 5 anti-social-behaviour 2023-01-01 51.89416 0.895433 2153012
## 6 anti-social-behaviour 2023-01-01 51.88050 0.909014 2153379
## street_name id location_type outcome_status
## 1 on or near military road 107596596 Force No Information
## 2 on or near 107596646 Force No Information
## 3 on or near culver street west 107595950 Force No Information
## 4 on or near ryegate road 107595953 Force No Information
## 5 on or near market close 107595979 Force No Information
## 6 on or near lisle road 107595985 Force No Information
cleaned_tempco4 <- temp180co4
# List of numeric columns in temp180co4
numeric_columns_temp <- sapply(cleaned_tempco4, is.numeric)
# Replace NA values in numeric columns with mean, preserving original precision
cleaned_tempco4[numeric_columns_temp] <- lapply(cleaned_tempco4[numeric_columns_temp], function(x) {
ifelse(is.na(x), round(mean(x, na.rm = TRUE), 1), x)
})
# Parse the Date column in temp180co4 dataset
cleaned_tempco4$Date <- ymd(cleaned_tempco4$Date)
# Remove irrelevant columns (PreselevHp, SnowDepcm)
cleaned_tempco4 <- cleaned_tempco4[, !names(cleaned_tempco4) %in% c("PreselevHp", "SnowDepcm")]
head(cleaned_tempco4)
## station_ID Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1 3590 2023-12-31 8.7 10.6 4.4 7.2
## 2 3590 2023-12-30 6.6 9.7 4.4 4.2
## 3 3590 2023-12-29 9.9 11.4 6.9 6.0
## 4 3590 2023-12-28 9.9 11.5 4.0 7.5
## 5 3590 2023-12-27 5.8 10.6 3.9 3.7
## 6 3590 2023-12-26 9.8 12.7 6.3 7.6
## HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1 89.6 S 25.0 63.0 999.0 6.2 8.0 8.0
## 2 85.5 WSW 22.7 50.0 1006.9 0.4 4.6 6.5
## 3 77.2 SW 32.8 61.2 1003.6 0.8 6.5 6.7
## 4 84.6 SSW 32.2 70.4 1003.2 2.8 6.8 7.1
## 5 86.4 SW 13.2 37.1 1016.4 2.0 4.0 6.9
## 6 86.9 WSW 23.5 46.3 1006.2 4.4 6.5 7.4
## SunD1h VisKm
## 1 0.0 26.3
## 2 1.1 48.3
## 3 0.1 26.7
## 4 0.0 25.1
## 5 3.2 30.1
## 6 0.0 45.8
# View the structure of the data frames
str(cleaned_tempco4)
## 'data.frame': 365 obs. of 16 variables:
## $ station_ID : int 3590 3590 3590 3590 3590 3590 3590 3590 3590 3590 ...
## $ Date : Date, format: "2023-12-31" "2023-12-30" ...
## $ TemperatureCAvg: num 8.7 6.6 9.9 9.9 5.8 9.8 12.5 10 9.6 10 ...
## $ TemperatureCMax: num 10.6 9.7 11.4 11.5 10.6 12.7 14.3 12 10.8 12.6 ...
## $ TemperatureCMin: num 4.4 4.4 6.9 4 3.9 6.3 9.5 8.4 8.1 8.1 ...
## $ TdAvgC : num 7.2 4.2 6 7.5 3.7 7.6 10.1 7 6.5 6.2 ...
## $ HrAvg : num 89.6 85.5 77.2 84.6 86.4 86.9 85.3 81.5 81.2 78.2 ...
## $ WindkmhDir : chr "S" "WSW" "SW" "SSW" ...
## $ WindkmhInt : num 25 22.7 32.8 32.2 13.2 23.5 34.1 32.7 34.1 37.5 ...
## $ WindkmhGust : num 63 50 61.2 70.4 37.1 46.3 72.3 61.2 68.6 77.8 ...
## $ PresslevHp : num 999 1007 1004 1003 1016 ...
## $ Precmm : num 6.2 0.4 0.8 2.8 2 4.4 0.8 0.8 0 2 ...
## $ TotClOct : num 8 4.6 6.5 6.8 4 6.5 7.8 5 8 7.5 ...
## $ lowClOct : num 8 6.5 6.7 7.1 6.9 7.4 7.8 6.7 8 7.5 ...
## $ SunD1h : num 0 1.1 0.1 0 3.2 0 0 2.9 0 1.4 ...
## $ VisKm : num 26.3 48.3 26.7 25.1 30.1 45.8 61.8 72.9 69.4 34.3 ...
str(cleaned_crimeco4)
## 'data.frame': 6878 obs. of 10 variables:
## $ category : chr "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" ...
## $ persistent_id : chr "" "" "" "" ...
## $ date : Date, format: "2023-01-01" "2023-01-01" ...
## $ lat : num 51.9 51.9 51.9 51.9 51.9 ...
## $ long : num 0.909 0.902 0.898 0.902 0.895 ...
## $ street_id : int 2153366 2153173 2153077 2153186 2153012 2153379 2153105 2153541 2152937 2153107 ...
## $ street_name : chr "on or near military road" "on or near" "on or near culver street west" "on or near ryegate road" ...
## $ id : int 107596596 107596646 107595950 107595953 107595979 107595985 107596603 107596291 107596305 107596453 ...
## $ location_type : chr "Force" "Force" "Force" "Force" ...
## $ outcome_status: chr "No Information" "No Information" "No Information" "No Information" ...
# Check for missing values
sum(is.na(cleaned_tempco4))
## [1] 0
sum(is.na(cleaned_crimeco4))
## [1] 0
Lets perform various analyses to gain information and potentially improve the safety and well-being of the community.
# Assuming your cleaned crime dataset is named 'cleaned_crimeco4'
# Create a table of crime categories and their frequencies
crime_category_freq <- table(cleaned_crimeco4$category)
# Convert the table to a data frame
crime_category_df <- as.data.frame(crime_category_freq)
# Rename the columns for better visualization
colnames(crime_category_df) <- c("Crime_Category", "Frequency")
# Generate the word cloud
wordcloud2(crime_category_df, size = 1.2)
# Create a frequency table for the 'category' variable
category_table <- table(cleaned_crimeco4$category)
# Calculate percentages for the frequency table
category_table_percentage <- prop.table(category_table) * 100
# Convert the table to a data frame and include percentages
category_table_df <- as.data.frame.table(category_table)
category_table_df$Percentage <- paste0(round(category_table_percentage, 2), "%")
# Create a two-way table for 'category' and 'street_name'
two_way_table_street <- table(cleaned_crimeco4$category, cleaned_crimeco4$street_name)
# Calculate percentages for the two-way table
two_way_table_street_percentage <- prop.table(two_way_table_street, margin = 1) * 100
# Convert tables to data frames and include percentages
two_way_table_street_df <- as.data.frame(two_way_table_street)
two_way_table_street_df$Percentage <- paste0(round(two_way_table_street_percentage, 2), "%")
# Create interactive tables
datatable(category_table_df, caption = "Frequency Table for Category")
datatable(two_way_table_street_df, caption = "Two-way Table for Category and Street Name")
# Load the plotly package
library(plotly)
# Convert the category_table_df to a data frame
category_table_df <- as.data.frame(category_table_df)
# Create an interactive horizontal bar plot
plot <- plot_ly(data = category_table_df, y = ~Var1, x = ~Freq, type = 'bar',
orientation = 'h',
name = "Category Frequency") %>%
layout(title = "Crime Category Frequency",
yaxis = list(title = "Category"),
xaxis = list(title = "Frequency"))
# Display the interactive plot
plot
# Filter dataset to include only violent crime incidents
violent_crime_data <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Count occurrences of violent crime incidents for each street
street_counts <- violent_crime_data %>%
count(street_name) %>%
arrange(desc(n)) # Arrange by descending order of count
# Select the top 10 streets
top_10_streets <- street_counts %>%
slice_max(n, n = 10)
# Define a qualitative color palette from Color Brewer
color_palette <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999")
# Create a pie chart for the top 10 streets with custom colors and without labels
pie_chart <- plot_ly(top_10_streets, labels = ~street_name, values = ~n, type = 'pie',
textinfo = 'none', marker = list(colors = color_palette)) %>%
layout(title = "Distribution of Violent Crime Incidents for Top 10 Streets")
# Convert the pie chart to an interactive plotly object
pie_chart_interactive <- ggplotly(pie_chart)
# Display the interactive pie chart
pie_chart_interactive
p <- ggplot(cleaned_crimeco4, aes(x = street_name, fill = category)) +
geom_bar() +
theme(axis.text.x = element_blank()) # Remove x-axis labels
# Convert it to an interactive chart
p_interactive <- ggplotly(p)
# Display the interactive chart
p_interactive
custom_palette <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF",
"#999999", "#FFA500", "#DECF3F", "#0072B2", "#D55E00", "#CC79A7")
# Create the bar plot
bar_plot <- ggplot(data = cleaned_crimeco4, aes(x = category, fill = outcome_status)) +
geom_bar(position = "dodge") +
labs(title = "Bar Plot of Category and Outcome Status",
x = "Category",
y = "Count",
fill = "Outcome Status") +
scale_x_discrete(labels = NULL) + # Remove category labels
scale_fill_manual(values = custom_palette) + # Set custom color palette
theme_minimal() +
guides(fill = "none") + # Remove the legend
theme(panel.background = element_rect(fill = "lightgray")) # Set background color inside the plot
# Convert to plotly object
bar_plot_interactive <- ggplotly(bar_plot)
# Display the interactive plot
bar_plot_interactive
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Create the 2D density plot
plot <- ggplot(violent_crimes, aes(x = date, y = lat)) +
geom_density_2d(alpha = 0.5) + # Removed `fill` parameter
labs(title = "2D Density Plot of Violent Crimes",
x = "Date",
y = "Latitude") +
theme_minimal() +
theme(panel.background = element_rect(fill = "grey90")) # Set background color to grey
# Convert ggplot object to plotly object
plot_interactive <- ggplotly(plot)
# Display the interactive plot
plot_interactive
# Create a ggplot object for the box plot
g <- ggplot(cleaned_crimeco4, aes(x = factor(category), y = outcome_status))
# Add the box plot layer
g <- g + geom_boxplot(fill = "lightblue", color = "blue") # Adjust box colors as needed
# Set labels and title
g <- g + labs(title = "Outcome Status of Incidents by Category",
x = "Incident Category")
# Customize the plot appearance
g <- g + theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Rotate and align labels
axis.text.y = element_blank(), # Remove y-axis labels
panel.background = element_rect(fill = "grey90")) # Set grey color for inside of plot area
# Convert ggplot object to plotly object
g <- ggplotly(g)
# Display the interactive plot
g
# Filter the dataset for "violent crime" category
violent_crime_data <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Create a violin plot
violin_plot <- ggplot(violent_crime_data, aes(x = outcome_status, y = lat, fill = outcome_status)) +
geom_violin() +
labs(title = "Outcome Status of Violent Crimes") +
theme_minimal() +
theme(axis.text.x = element_blank(), # Remove x-axis labels
axis.text.y = element_blank(), # Remove y-axis labels
legend.position = "none") # Remove legend
# Convert ggplot object to plotly object
violin_plot_interactive <- ggplotly(violin_plot, source_data = row.names(violent_crime_data))
# Display the interactive plot
violin_plot_interactive
library(viridis)
## Loading required package: viridisLite
# Convert date column to Date format using lubridate's ymd() function
cleaned_crimeco4$date <- ymd(cleaned_crimeco4$date)
# Group by date and category, count occurrences
daily_crime_counts <- cleaned_crimeco4 %>%
group_by(date, category) %>%
summarise(count = n(), .groups = "drop")
# Get the number of unique categories
num_categories <- length(unique(daily_crime_counts$category))
# Create a custom color palette
custom_palette <- viridis(num_categories)
# Create the interactive time series plot using plotly
plot <- plot_ly(daily_crime_counts, x = ~date, y = ~count, color = ~category, type = 'scatter', mode = 'lines', name = ~category, colors = custom_palette) %>%
layout(title = "Daily Crime Counts by Category",
xaxis = list(title = "Date"),
yaxis = list(title = "Count"))
# Display the interactive plot
plot
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Convert date column to Date format using lubridate's ymd() function
violent_crimes$date <- ymd(violent_crimes$date)
# Group by date, count occurrences
daily_violent_crime_counts <- violent_crimes %>%
group_by(date) %>%
summarise(count = n(), .groups = "drop") %>%
ungroup()
# Create a smoothed time series using loess smoothing
smoothed_count <- loess(count ~ as.numeric(date), data = daily_violent_crime_counts)
# Create the interactive time series plot using plotly
plot <- plot_ly(daily_violent_crime_counts, x = ~date, y = ~count, type = 'scatter', mode = 'lines', name = 'Daily Counts') %>%
add_trace(x = daily_violent_crime_counts$date, y = predict(smoothed_count), mode = 'lines', name = 'Smoothed') %>%
layout(title = "Daily Violent Crime Counts with Smoothing",
xaxis = list(title = "Date"),
yaxis = list(title = "Count"))
# Display the interactive plot
plot
# Convert Date column to character format
cleaned_tempco4$Date <- as.character(cleaned_tempco4$Date)
cleaned_tempco4$Date <- ymd(cleaned_tempco4$Date)
# Group crime data by date and count occurrences
daily_crime_counts <- cleaned_crimeco4 %>%
group_by(date) %>%
summarise(count = n(), .groups = "drop")
# Merge crime counts with temperature data
merged_data <- left_join(daily_crime_counts, cleaned_tempco4, by = c("date" = "Date"))
# Create time series plot with crime counts and temperature
time_series_plot <- plot_ly(merged_data, x = ~date) %>%
add_trace(y = ~count, mode = "lines", name = "Crime Counts", type = 'scatter', line = list(color = 'red')) %>%
add_trace(y = ~TemperatureCAvg, mode = "lines", name = "Average Temperature", type = 'scatter', line = list(color = 'blue')) %>%
layout(title = "Crime Counts and Temperature Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Count/Temperature"),
plot_bgcolor = "rgba(211,211,211,0.2)", # Set grey background color with some transparency
paper_bgcolor = "rgba(211,211,211,0.2)") # Set grey background color for the plot area
time_series_plot
# Load the plotly library
library(plotly)
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Convert date column to Date format
violent_crimes$date <- ymd(violent_crimes$date)
# Group by date and calculate minimum and maximum temperatures
crime_weather_data <- violent_crimes %>%
left_join(cleaned_tempco4, by = c("date" = "Date")) %>%
group_by(date) %>%
summarise(min_temp = min(TemperatureCMin, na.rm = TRUE),
max_temp = max(TemperatureCMax, na.rm = TRUE))
# Create scatter plot
scatter_plot <- plot_ly(data = crime_weather_data, x = ~date) %>%
add_trace(y = ~min_temp, name = "Minimum Temperature", type = 'scatter', mode = 'markers', marker = list(color = 'blue')) %>%
add_trace(y = ~max_temp, name = "Maximum Temperature", type = 'scatter', mode = 'markers', marker = list(color = 'red')) %>%
layout(title = "Minimum and Maximum Temperature During Violent Crimes",
xaxis = list(title = "Date"),
yaxis = list(title = "Temperature (°C)", side = "left"),
yaxis2 = list(title = "", overlaying = "y", side = "right"),
legend = list(x = 1.05, y = 1, bgcolor = "rgba(255, 255, 255, 0.5)"), # Adjust legend position
margin = list(r = 150), # Increase right margin to accommodate legend
plot_bgcolor = "rgba(211,211,211,0.2)", # Set grey background color with some transparency
paper_bgcolor = "rgba(211,211,211,0.2)") # Set grey background color for the plot area
# Make the plot interactive
scatter_plot <- scatter_plot %>% config(displayModeBar = TRUE)
# Display the interactive plot
scatter_plot
# Select relevant weather variables from cleaned_tempco4
weather_data <- cleaned_tempco4[, c("Date", "TemperatureCAvg", "Precmm", "SunD1h")]
# Calculate count of crime incidents for each date from cleaned_crimeco4
crime_count <- cleaned_crimeco4 %>%
group_by(date) %>%
summarise(crime_count = n())
# Merge crime incident count and weather data based on the date
merged_data <- left_join(crime_count, weather_data, by = c("date" = "Date"))
# Step 2: Calculate correlation coefficients
correlation_matrix <- cor(merged_data[, -1], use = "complete.obs")
# Step 3: Visualize the correlation coefficients
heatmap_plot <- plot_ly(z = correlation_matrix, type = "heatmap", colorscale = "Viridis") %>%
layout(title = "Correlation Between Crime Incidents and Weather Variables",
xaxis = list(title = "Weather Variables"),
yaxis = list(title = "Weather Variables"),
margin = list(l = 100, b = 100)) # Adjust margins for better display
# Step 4: Identify significant correlations
# Analyze the heatmap visually to identify significant positive or negative correlations
# Display the interactive heatmap
heatmap_plot
# Step 1: Prepare the data
# Filter crime data to include only violent crime incidents
violent_crime_data <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Calculate count of violent crime incidents for each date
violent_crime_count <- violent_crime_data %>%
group_by(date) %>%
summarise(violent_crime_count = n())
# Select relevant weather variables from cleaned_tempco4
weather_data <- cleaned_tempco4[, c("Date", "TemperatureCAvg", "Precmm", "SunD1h")]
# Merge violent crime incident count and weather data based on the date
merged_data <- left_join(violent_crime_count, weather_data, by = c("date" = "Date"))
# Step 2: Calculate correlation coefficients
correlation_matrix <- cor(merged_data[, -1], use = "complete.obs")
# Step 3: Visualize the correlation coefficients
heatmap_plot <- plot_ly(z = correlation_matrix, type = "heatmap", colorscale = "Portland") %>%
layout(title = "Correlation Between Violent Crime Incidents and Weather Variables",
xaxis = list(title = "Weather Variables"),
yaxis = list(title = "Weather Variables"),
margin = list(l = 100, b = 100)) # Adjust margins for better display
# Step 4: Identify significant correlations
# Analyze the heatmap visually to identify significant positive or negative correlations
# Display the interactive heatmap
heatmap_plot
# Merge crime counts with weather data
merged_data <- left_join(daily_crime_counts, cleaned_tempco4, by = c("date" = "Date"))
# Create line plots for each weather variable
temp_plot <- plot_ly(merged_data, x = ~date) %>%
add_trace(y = ~TemperatureCAvg, name = "Average Temperature", type = 'scatter', mode = 'lines', line = list(color = 'blue')) %>%
layout(title = "Average Temperature Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Average Temperature (°C)"))
precipitation_plot <- plot_ly(merged_data, x = ~date) %>%
add_trace(y = ~Precmm, name = "Precipitation", type = 'scatter', mode = 'lines', line = list(color = 'green')) %>%
layout(title = "Precipitation Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Precipitation (mm)"))
sunlight_plot <- plot_ly(merged_data, x = ~date) %>%
add_trace(y = ~SunD1h, name = "Sunlight Hours", type = 'scatter', mode = 'lines', line = list(color = 'orange')) %>%
layout(title = "Sunlight Hours Over Time",
xaxis = list(title = "Date"),
yaxis = list(title = "Sunlight Hours"))
# Combine plots into a single subplot
subplot <- subplot(temp_plot, precipitation_plot, sunlight_plot, nrows = 3)
# Make the subplot interactive
subplot <- subplot %>% config(displayModeBar = TRUE)
# Display the subplot
subplot
library(MASS) # Load the MASS package for kde2d function
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
# Filtered crime data with latitude and longitude columns
crime_data <- cleaned_crimeco4 %>%
filter(!is.na(lat) & !is.na(long))
# Calculate the 2D kernel density estimation of crime incidents
density <- kde2d(crime_data$long, crime_data$lat)
# Create a point density plot (heatmap)
density_plot <- plot_ly(z = ~density$z, type = "heatmap", colorscale = "Viridis", zauto = FALSE, zmax = max(density$z)) %>%
layout(title = "Point Density Plot of Crime Incidents",
xaxis = list(title = "Longitude"),
yaxis = list(title = "Latitude"))
# Display the interactive point density plot
density_plot
# Function to categorize dates into seasons
get_season <- function(date) {
month <- month(date)
if (month %in% 3:5) {
return("Spring")
} else if (month %in% 6:8) {
return("Summer")
} else if (month %in% 9:11) {
return("Fall")
} else {
return("Winter")
}
}
# Apply the function to categorize dates into seasons
cleaned_crimeco4 <- cleaned_crimeco4 %>%
mutate(season = factor(sapply(date, get_season)))
# Aggregate crime incidents by season
crime_counts_season <- cleaned_crimeco4 %>%
group_by(season) %>%
summarise(crime_count = n())
# Aggregate weather variables by season (taking averages for simplicity)
weather_season <- cleaned_tempco4 %>%
mutate(season = factor(sapply(Date, get_season))) %>%
group_by(season) %>%
summarise(avg_temp = mean(TemperatureCAvg, na.rm = TRUE),
avg_precipitation = mean(Precmm, na.rm = TRUE),
avg_sunshine = mean(SunD1h, na.rm = TRUE))
# Create interactive box plot for crime incidents by season
crime_box_plot <- plot_ly(data = crime_counts_season, x = ~season, y = ~crime_count, type = "box") %>%
layout(title = "Crime Incidents by Season",
xaxis = list(title = "Season"),
yaxis = list(title = "Crime Count"))
# Display the interactive plots
crime_box_plot
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
filter(category == "violent-crime")
# Apply the function to categorize dates into seasons
violent_crimes <- violent_crimes %>%
mutate(season = factor(sapply(date, get_season)))
# Aggregate violent crime incidents by season
violent_crime_counts_season <- violent_crimes %>%
group_by(season) %>%
summarise(violent_crime_count = n())
# Create interactive bar plot for violent crime incidents by season
violent_crime_bar_plot <- plot_ly(data = violent_crime_counts_season, x = ~season, y = ~violent_crime_count, type = "bar") %>%
layout(title = "Violent Crime Incidents by Season",
xaxis = list(title = "Season"),
yaxis = list(title = "Violent Crime Count"))
# Display the interactive plot
violent_crime_bar_plot
colnames(cleaned_crimeco4)
## [1] "category" "persistent_id" "date" "lat"
## [5] "long" "street_id" "street_name" "id"
## [9] "location_type" "outcome_status" "season"
colnames(cleaned_tempco4)
## [1] "station_ID" "Date" "TemperatureCAvg" "TemperatureCMax"
## [5] "TemperatureCMin" "TdAvgC" "HrAvg" "WindkmhDir"
## [9] "WindkmhInt" "WindkmhGust" "PresslevHp" "Precmm"
## [13] "TotClOct" "lowClOct" "SunD1h" "VisKm"
library(leaflet)
# Define the path to the downloaded icon
icon_path <- "/Users/nithyashree/Downloads/icons8-high-risk-16.png"
# Define a custom icon with popup
customIcon <- makeIcon(
iconUrl = icon_path, # Local file path to the icon
iconWidth = 16, # Icon width
iconHeight = 16 # Icon height
)
# Create Leaflet map
crime_map <- leaflet() %>%
addTiles() %>%
addMarkers(
data = cleaned_crimeco4,
lng = ~long,
lat = ~lat,
icon = customIcon, # Use custom icon
popup = ~paste("Category: ", category, "<br>Date: ", date) # Specify popup content
)
# Save the map as an HTML file
saveWidget(crime_map, file = "crime_map.html")